***************
* Title: gambia_ecd_edcc_table2.do
* Author: Todd Pugatch
* Description: replication code for Blimpo, Carneiro, Jervis, and Pugatch,
*	"Improving Access and Quality in Early Childhood Development Programs: 
*		Experimental Evidence from The Gambia"
*	for Economic Development and Cultural Change
* Inputs: ECD_3to6_Gambia_cleanv1.dta
* Outputs: gambia_ecd_edcc_table2.txt, gambia_ecd_edcc_table2[a-b].xls
* Notes: creates Table 2
****************
#delimit;
local start=`"$S_TIME"';
clear;
clear matrix;
clear mata;
graph drop _all;
cap log close;
set more off;
/*set directory:
	cd mydir
*/
local data=`"Data\cleaned"';
local output=`"analysis\output"';
log using analysis\output\gambia_ecd_edcc_table2.txt, text replace;

* AGGREGATE ATTRITION ANALYSIS;
* load and prepare data;
qui use `data'\ECD_3to6_Gambia_cleanv1, clear;

* sample sizes before defining eligibles (# of children and project sites);
qui egen site=tag(settlement_code);
table treatment, c(freq rawsum site);
drop site;

/*NEED TO CHECK TREATMENT STATUS OF SETTLEMENT 37028. DISREGARD FOR NOW*/
qui drop if settlement_code==37028;

* define 3 groups:
	--in baseline
	--in endline (original sample)
	--in endline (newly sampled);
qui gen in_baseline=(ip22!=3 & ip22!=.);	
qui gen in_endline=(interview_result==1);
qui gen in_endline_old=(in_endline==1 & in_baseline==1);
qui gen in_endline_new=(in_endline==1 & in_baseline==0);
/*treat unresolved gender mismatches as new to endline*/
qui replace in_endline_new=1 if in_endline_new==0 & child_gender_mismatch_resolved==0;
qui replace in_endline_old=. if in_endline_new==1;

/*keep eligibles*/
* keep if baseline age from 3-6 years, or new to endline;
* keep definition; 
count; 
keep if (child_age_mths_dob>=36 & child_age_mths_dob<84 & child_age_mths_dob!=. & in_baseline==1)|
	(child_age_mths_dob==. & in_baseline==0);

* keep if new to endline and would have been 3-6 at baseline (define as 4-8 at endline to allow errors);
keep if in_endline_new==0|(in_endline_new==1 & selected_child_age>=48 & selected_child_age<=96 & selected_child_age!=.)|
	(in_endline_new==1 & child_gender_mismatch_resolved==0);

* sample sizes after defining eligibles (# of children and project sites);
qui egen site=tag(settlement_code);
foreach x in base end {;
	bysort settlement_code: egen sitecount_`x'line=total(in_`x'line);
};
table treatment, c(freq rawsum site);
	
* get sample sizes by baseline/endline (old/new) and treatment status;
table treatment, c(rawsum in_baseline rawsum in_endline_old rawsum in_endline_new rawsum site);

* repeat for having valid MDAT fine motor and language/hearing scores;
/*note that "in sample" defined as having at least one valid MDAT score, not both as in previous analyses of attrition*/
qui gen in_baseline_mdat=(in_baseline==1 & (zfinemotor_baseline!=.|zlanghear_baseline!=.));
qui replace in_baseline_mdat=. if in_endline_new==1;
foreach x in endline endline_old endline_new {;
	qui gen in_`x'_mdat=(in_`x'==1 & (zfinemotor_endline!=.|zlanghear_endline!=.));
};
qui replace in_endline_old_mdat=. if in_endline_new==1;
qui gen in_endline_old_mdat_base=in_endline_old_mdat;
qui replace in_endline_old_mdat_base=. if in_baseline_mdat!=1; /*in baseline MDAT & in endline MDAT*/
table treatment, c(rawsum in_baseline_mdat rawsum in_endline_old_mdat rawsum in_endline_new_mdat
	rawsum in_endline_old_mdat_base);

* IS ATTRITION SYSTEMATIC BY TREATMENT STATUS?;
* define 4 forms of attrition
	1) in baseline but not in endline
	2) missing baseline test score
	3) missing endline test score (regardless of baseline status)
	4) missing endline test score (conditional on being in baseline);
/*condition (2)-(4) on being selected for test*/	
qui gen attrit_endline=1-in_endline_old;
qui gen miss_baseline_mdat=1-in_baseline_mdat;
qui gen miss_endline_mdat=1-in_endline_mdat;
qui replace miss_baseline_mdat=. if s15q12==2;
qui replace miss_endline_mdat=. if q812==2;
qui gen miss_endline_mdat_inbaseline=miss_endline_mdat;
qui replace miss_endline_mdat_inbaseline=. if in_endline_new==1;
qui gen miss_bothscores=(miss_baseline_mdat==1 & miss_endline_mdat==1);
qui gen attrit_endline_mdat=1-in_endline_old_mdat;
qui gen attrit_endline_mdat_base=1-in_endline_old_mdat;
qui replace attrit_endline_mdat_base=. if in_baseline_mdat==0;

local X "attrit_endline miss_baseline_mdat miss_endline_mdat miss_endline_mdat_inbaseline attrit_endline_mdat_base";

/*define indicators for treatment*/
qui gen purecontrol=(treatment==1);
qui gen communitybased=(treatment==6);
qui gen ECDAnnex_control=(treatment==4);
qui gen ECDAnnex_treated=(treatment==5);

* unadjusted means & differences by treatment status;
/*account for correlated outcomes by settlement by clustering at that level*/
/*ECD Annex treated v. ECD Annex control*/
orth_out `X' if ECDAnnex_control==1|ECDAnnex_treated==1 using `output'\gambia_ecd_edcc_table2a.xls, 
	by(treatment) se vce(cluster settlement_code) compare count colnum title("unadjusted means") replace;

/*community-based v. pure control*/
orth_out `X' if purecontrol==1|communitybased==1 using `output'\gambia_ecd_edcc_table2b.xls, 
	by(treatment) se vce(cluster settlement_code) compare count colnum title("unadjusted means") replace;
	
* adjusted means, adjusting for regional stratification; 
* compare to regressions in gambia_ecd_attrition[2-3].do to verify equivalence of p-values;
/*ECD Annex treated v. ECD Annex control*/
orth_out `X' if ECDAnnex_control==1|ECDAnnex_treated==1 using `output'\gambia_ecd_edcc_table2a.xls, 
	by(treatment) se vce(cluster settlement_code) compare test count colnum covar(region2) title("adjusted means") 
	happend replace;

/*community-based v. pure control*/
orth_out `X' if purecontrol==1|communitybased==1 using `output'\gambia_ecd_edcc_table2b.xls, 
	by(treatment) se vce(cluster settlement_code) compare test count colnum covar(region2) title("adjusted means") 
	happend replace;
	
	
local end=`"$S_TIME"'; 
di "`start'";
di "`end'";
log close;	
